packages <- c(
"CIMseq", "CIMseq.data", "printr", "ggthemes", "tidyverse"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
#rename classes
renameClasses <- function(class) {
case_when(
class == "0" ~ "C.Stem.proximal",
class == "1" ~ "C.Goblet",
class == "2" ~ "SI.Stem",
class == "3" ~ "C.Stem.distal",
class == "4" ~ "SI.Goblet.Paneth",
class == "5" ~ "Tufft",
class == "6" ~ "Enteroendocrine",
class == "7" ~ "Blood",
TRUE ~ "error"
)
}
c.order <- c(
"C.Stem.proximal", "C.Goblet", "C.Stem.distal", "Tufft", "Enteroendocrine",
"Blood", "SI.Stem", "SI.Goblet.Paneth"
)
getData(cObjSng, "classification") <- renameClasses(getData(cObjSng, "classification"))
fractions <- getData(sObj, "fractions")
colnames(fractions) <- renameClasses(colnames(fractions))
sObj@fractions <- fractions
Fig 1: Classes
plotUnsupervisedClass(cObjSng, cObjMul)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 2: Cell type gene expression
plotUnsupervisedMarkers(
cObjSng, cObjMul,
c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1", "Alpi", "Lyz1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 3: Cell cycle and architecture marker
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Mki67"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Plet1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Hoxb13"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 4: Plates
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = unique_key)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 5: Mice
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_id = as.character(subject_id)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_id), alpha = 0.3) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 6: Age
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_age = as.character(subject_age)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_age)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...

Fig 7: Classes per source
tibble(
sample = colnames(getData(cObjSng, "counts")),
class = getData(cObjSng, "classification")
) %>%
mutate(source = if_else(
str_detect(sample, "SRR654") | str_detect(sample, "SRR510"),
"External", "Enge"
)) %>%
group_by(source, class) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(source) %>%
mutate(`%` = n / sum(n) * 100) %>%
ggplot() +
geom_bar(aes(source, `%`, fill = source), stat = "identity", position = position_dodge(width = 1)) +
facet_wrap(~class, scales = "free") +
labs(x = "Source", y = "% of dataset") +
theme_bw() +
theme(
legend.position = "top",
axis.title.x = element_blank()
) +
guides(fill = FALSE)

Fig 8: Source overlay
getData(cObjSng, "dim.red") %>%
matrix_to_tibble("sample") %>%
setNames(c("sample", "UMAP.dim1", "UMAP.dim2")) %>%
mutate(source = case_when(
str_detect(sample, "SRR654") ~ "Tabula Muris",
str_detect(sample, "SRR510") ~ "Regev",
TRUE ~ "Enge"
)) %>%
sample_n(nrow(.), FALSE) %>%
ggplot() +
geom_point(aes(UMAP.dim1, UMAP.dim2, colour = source), alpha = 0.75) +
theme_few()

Fig 9: Class mean correlation
averageGroupExpression <- function(exp, classes) {
c <- unique(classes)
means <- purrr::map_dfc(c, function(x) {
data.frame(rowMeans(exp[, classes == x]))
})
means <- as.matrix(means)
colnames(means) <- c
return(means)
}
av.exp <- averageGroupExpression(
getData(cObjSng, "counts.cpm")[getData(cObjMul, "features"), ],
getData(cObjSng, "classification")
)
p.cor <- cor(av.exp, method = "pearson")
p.cor %>%
matrix_to_tibble("to") %>%
gather(from, cor, -to) %>%
mutate(
from = parse_factor(from, levels = c.order),
to = parse_factor(to, levels = c.order)
) %>%
ggplot() +
geom_tile(aes(from, to, fill = cor)) +
scale_fill_viridis_c() +
theme_few() +
theme(
axis.text.x = element_text(angle = 90),
axis.title = element_blank()
) +
guides(fill = guide_colorbar(title = "Pearson's\ncorrelation"))

Fig 10: Connections per multiplet
adj <- adjustFractions(cObjSng.enge, cObjMul, sObj, theoretical.max = 4)
table(apply(adj, 1, sum)) / nrow(adj) * 100
| 5.833333 |
34.16667 |
40 |
12.5 |
7.5 |
rowSums(adj) %>%
tibble(
sample = names(.),
connections = .
) %>%
inner_join(MGA.Meta, by = "sample") %>%
count(sub_tissue, connections)
| colon |
0 |
5 |
| colon |
1 |
29 |
| colon |
2 |
37 |
| colon |
3 |
13 |
| colon |
4 |
9 |
| small_intestine |
0 |
2 |
| small_intestine |
1 |
12 |
| small_intestine |
2 |
11 |
| small_intestine |
3 |
2 |
Fig 11: Fraction histogram
fractions <- getData(sObj, "fractions")
tibble(fractions = c(fractions)) %>%
ggplot() +
geom_histogram(aes(fractions), binwidth = 0.01) +
theme_bw()

Fig 12: Detected cell types vs. cost
tibble(
nCellTypes = apply(adj, 1, sum),
cost = getData(sObj, "costs")
) %>%
ggplot() +
geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
theme_bw()

Fig 13: Estimated cell numbers vs. cost
tibble(
sample = names(getData(sObj, "costs")),
cost = unname(getData(sObj, "costs"))
) %>%
inner_join(
select(estimateCells(cObjSng.enge, cObjMul), sample, estimatedCellNumber),
by = "sample"
) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number"
#breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
) +
theme_bw()

Fig 14: Estimated cell number vs. Detected cell number
ercc <- filter(estimateCells(cObjSng.enge, cObjMul), sampleType == "Multiplet")
nConnections <- rowSums(adj)
nConnections %>%
tibble(
sample = names(.),
detectedConnections = .
) %>%
inner_join(ercc) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(ercc$estimatedCellNumber))
) +
scale_y_continuous(
name = "Detected cell number",
breaks = 0:max(round(nConnections))
) +
theme_bw()
## Joining, by = "sample"

Fig 15: Detected cell number vs. Total counts
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.counts = colSums(getData(cObjMul, "counts"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total counts") +
theme_bw()

Fig 16: Detected cell number vs. Total ERCC counts
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.ercc = colSums(getData(cObjMul, "counts.ercc"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total ERCC counts") +
theme_bw()

Fig 17: Connections
plotSwarmCircos(sObj, cObjSng.enge, cObjMul, classOrder = c.order)

Fig 18: Filtered
Only detected duplicates and triplicates.
Only ERCC estimated cell number <= 4.
Weight cutoff = 5.
adj <- adjustFractions(cObjSng.enge, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep1 <- rs == 2 | rs == 3
keep2 <- samples %in% filter(estimateCells(cObjSng, cObjMul), estimatedCellNumber <= 4)$sample
plotSwarmCircos(
filterSwarm(sObj, keep1 & keep2), cObjSng.enge, cObjMul, weightCut = 5,
classOrder = c.order)